Nov 3, 2016
plot function (or similar)text, lines, points, axis)xyplot, bwplot, etc.)base and lattice+The Grammar of Graphics by Leland WilkinsonHadley Wickham (while he was a graduate student at Iowa State)base and lattice)Grammar of graphics represents an abstraction of graphics ideas/objectsggplot2 book)mapping from data to aesthetic attributes (colour, shape, size) of geometric objects (points, lines, bars). The plot may also contain statistical transformations of the data and is drawn on a specific coordinate system"data frameaesthetic mappings: how data are mapped to color, size, thickness, …geoms: geometric objects like points, lines, shapesfacets: for conditional plots (subgraph)stats: statistical transformations like binning, quantiles, smoothing.scales: what scale an aesthetic map uses (example: male = red, female = blue)coordinate systemqplot()plot function in base graphics systemaesthetics (size, shape, color) and geoms (points, lines)qplot()labeledqplot() hides what goes on underneath, which is okay for most operationsggplot() is the core function and very flexible for doing things qplot() cannot dolibrary(ggplot2) # qplot(x, y, data) -> scatter plot qplot(carat, price, data=diamonds)
qplot(carat, price, data=diamonds, color=clarity)
# add smooth curve
qplot(carat, price, data=diamonds,
geom=c("point", "smooth"))
qplot(price, data=diamonds, fill=cut)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# facets = . ~ cut (horizontal)
qplot(carat, price, data=diamonds,
facets=.~cut)
# facets = cut ~ . (vertical)
qplot(carat, price, data=diamonds,
facets=cut~.)
qplot(hwy, data = mpg, facets = drv ~ ., binwidth = 2)
qplot() function is the analog to plot() but with many built-in featuresbase/latticexlab(), ylab(), labs(), ggtitle()geom functions has options to modifytheme()theme(legend.position = "none")theme_gray(): The default theme (gray background)theme_bw(): More stark/plain# aes: Aesthetic attributes, color, shape, size, thickness # geom_*: Geometric objects, point, line, boxplot, bar ggplot(diamonds, aes(x = carat, y = price)) + geom_point()
# aes: Aesthetic attributes, color, shape, size, thickness # geom_*: Geometric objects, point, line, boxplot, bar ggplot(diamonds, aes(x = cut, y = price)) + geom_boxplot()
# facet_grid: add subgraph ggplot(diamonds, aes(x = carat, y = price)) + geom_point() + facet_grid(cut~.)
# facet_grid: add subgraph ggplot(diamonds, aes(x = carat, y = price)) + geom_point() + facet_grid(.~cut)
# geom_smooth: method='lm' (linear regression) ggplot(diamonds, aes(x = carat, y = price)) + geom_point() + facet_grid(cut~.) + geom_smooth(method='lm')
# color = cut ggplot(diamonds, aes(x = carat, y = price, color = cut)) + geom_point() + geom_smooth(method='lm')
testdat <- data.frame(x = 1:100, y = rnorm(100)) testdat[50, 2] <- 100 # Outlier! plot(testdat$x, testdat$y, type = "l", ylim = c(-3,3))
g <- ggplot(testdat, aes(x = x, y = y)) g + geom_line()
g + geom_line() + ylim(-3, 3)
g + geom_line() + coord_cartesian(ylim = c(-3, 3))
library(datasets) airquality$Month <- as.factor(airquality$Month) airquality.mean <- aggregate(Ozone~Month, airquality, mean) ggplot() + geom_bar(data=airquality.mean, aes(x=Month, y=Ozone), stat="identity")
airquality.sd <- aggregate(Ozone~Month, airquality, sd)
airquality.eb <- merge(airquality.mean, airquality.sd, by="Month")
ggplot(data=airquality.eb) +
geom_bar(aes(x=Month, y=Ozone.x), stat = "identity")+
geom_errorbar( # Ozone.x = mean, Ozone.y = sd
aes(x=Month, ymin=Ozone.x-Ozone.y, ymax=Ozone.x+Ozone.y), width=.1)
ggplot2 Referenceggplot2 book by Hadley WickhamR Graphics Cookbook by Winston Chang (examples in base plots and in ggplot2)nba <- read.csv("http://datasets.flowingdata.com/ppg2008.csv")
head(nba)
## Name G MIN PTS FGM FGA FGP FTM FTA FTP X3PM X3PA ## 1 Dwyane Wade 79 38.6 30.2 10.8 22.0 0.491 7.5 9.8 0.765 1.1 3.5 ## 2 LeBron James 81 37.7 28.4 9.7 19.9 0.489 7.3 9.4 0.780 1.6 4.7 ## 3 Kobe Bryant 82 36.2 26.8 9.8 20.9 0.467 5.9 6.9 0.856 1.4 4.1 ## 4 Dirk Nowitzki 81 37.7 25.9 9.6 20.0 0.479 6.0 6.7 0.890 0.8 2.1 ## 5 Danny Granger 67 36.2 25.8 8.5 19.1 0.447 6.0 6.9 0.878 2.7 6.7 ## 6 Kevin Durant 74 39.0 25.3 8.9 18.8 0.476 6.1 7.1 0.863 1.3 3.1 ## X3PP ORB DRB TRB AST STL BLK TO PF ## 1 0.317 1.1 3.9 5.0 7.5 2.2 1.3 3.4 2.3 ## 2 0.344 1.3 6.3 7.6 7.2 1.7 1.1 3.0 1.7 ## 3 0.351 1.1 4.1 5.2 4.9 1.5 0.5 2.6 2.3 ## 4 0.359 1.1 7.3 8.4 2.4 0.8 0.8 1.9 2.2 ## 5 0.404 0.7 4.4 5.1 2.7 1.0 1.4 2.5 3.1 ## 6 0.422 1.0 5.5 6.5 2.8 1.3 0.7 3.0 1.8
library(reshape2) # for melt() nba.m <- melt(nba, id.vars = "Name") # wide to long based on name head(nba.m, 10)
## Name variable value ## 1 Dwyane Wade G 79 ## 2 LeBron James G 81 ## 3 Kobe Bryant G 82 ## 4 Dirk Nowitzki G 81 ## 5 Danny Granger G 67 ## 6 Kevin Durant G 74 ## 7 Kevin Martin G 51 ## 8 Al Jefferson G 50 ## 9 Chris Paul G 78 ## 10 Carmelo Anthony G 66
library(ggplot2) # for ggplot()
ggplot(nba.m, aes(variable, Name)) +
geom_tile(aes(fill = value), colour = "white") +
scale_fill_gradient(low = "white", high = "steelblue")
head(nba, 2)
## Name G MIN PTS FGM FGA FGP FTM FTA FTP X3PM X3PA X3PP ## 1 Dwyane Wade 79 38.6 30.2 10.8 22.0 0.491 7.5 9.8 0.765 1.1 3.5 0.317 ## 2 LeBron James 81 37.7 28.4 9.7 19.9 0.489 7.3 9.4 0.780 1.6 4.7 0.344 ## ORB DRB TRB AST STL BLK TO PF ## 1 1.1 3.9 5.0 7.5 2.2 1.3 3.4 2.3 ## 2 1.3 6.3 7.6 7.2 1.7 1.1 3.0 1.7
nba[, 2:21] <- apply(nba[, 2:21], 2, scale) # scale, average to 0 head(nba, 2)
## Name G MIN PTS FGM FGA FGP ## 1 Dwyane Wade 0.6179300 1.0019702 3.179941 2.920022 2.596832 0.5136017 ## 2 LeBron James 0.7693834 0.6119299 2.566974 1.957185 1.697237 0.4649190 ## FTM FTA FTP X3PM X3PA X3PP ## 1 1.917475 2.110772 -0.7401673 -0.1080044 0.1303647 -0.15749098 ## 2 1.778729 1.896589 -0.5233214 0.4920201 0.6971679 0.02738974 ## ORB DRB TRB AST STL BLK TO ## 1 -0.27213551 -0.3465676 -0.3287465 1.652247 2.558238 1.2064646 1.790445 ## 2 -0.06117775 1.0080940 0.6605370 1.516147 1.367252 0.8627425 1.059651 ## PF ## 1 -0.2984568 ## 2 -1.3903719
nba.m <- melt(nba)
ggplot(nba.m, aes(variable, Name)) +
geom_tile(aes(fill = value),colour = "white") +
scale_fill_gradient(low = "white",high = "steelblue")
treemapify packagesif (!require('treemapify')){
library(devtools)
install_github("wilkox/treemapify")
library(treemapify)
}
data(G20)
head(G20)
## Region Country Trade.mil.USD Nom.GDP.mil.USD HDI ## 1 Africa South Africa 208000 384315 0.629 ## 2 North America United States 3969000 15684750 0.937 ## 3 North America Canada 962600 1819081 0.911 ## 4 North America Mexico 756800 1177116 0.775 ## 5 South America Brazil 494800 2395968 0.730 ## 6 South America Argentina 152690 474954 0.811 ## Population Economic.classification ## 1 53000000 Developing ## 2 316173000 Advanced ## 3 34088000 Advanced ## 4 112211789 Developing ## 5 201032714 Developing ## 6 40117096 Developing
# treemapify: convert format
# area, fill, label, group
treeMapCoordinates <- treemapify(data=G20,
area = "Nom.GDP.mil.USD", fill = "HDI",
label = "Country", group = "Region")
head(treeMapCoordinates)
## fill label xmin xmax ymin ymax group ## 1 0.876 European Union 0.00000 38.66972 0.00000 58.99641 Europe ## 2 0.920 Germany 38.66972 63.32079 0.00000 19.17284 Europe ## 3 0.893 France 38.66972 63.32079 19.17284 33.88097 Europe ## 4 0.875 United Kingdom 38.66972 63.32079 33.88097 47.64081 Europe ## 5 0.881 Italy 38.66972 63.32079 47.64081 58.99641 Europe ## 6 0.937 United States 0.00000 53.16491 58.99641 100.00000 North America
ggplotify(treeMapCoordinates)
ggplotify(treeMapCoordinates)+
scale_fill_gradient(low = "white",high = "steelblue")
choroplethr & choroplethrMaps packages rgdal, rgeos,maptools packages for map filesggplot2 & RColorBrewer for plotting## [1] TRUE
shapefile Wikishapefilemaptools package, readShapeSpatial function.packages <- c("rgdal", "rgeos", "maptools", "ggplot2", "gpclib", "RColorBrewer")
.inst <- .packages %in% installed.packages()
if(length(.packages[!.inst]) > 0) suppressMessages(install.packages(.packages[!.inst], repos="http://cran.rstudio.com/"))
invisible(lapply(.packages, library, character.only=TRUE))
tw_new <- readShapeSpatial("Taiwan/Town_MOI_1041215.shp")
names(tw_new)
## [1] "OBJECTID" "T_UID" "Town_ID" "T_Name" "T_Desc" ## [6] "Add_Date" "Add_Accept" "Remark" "County_ID" "C_Name"
rgdal, rgeos,maptoolsfortify: shapefile \(\rightarrow\) data.framehead(tw_new$Town_ID)
## [1] 1001402 1001321 1000913 1001411 1001416 1000712 ## 368 Levels: 0900701 0900702 0900703 0900704 0902001 0902002 ... 6801300
tw_new.df <- fortify(tw_new, region = "T_UID") # from ggplot2 package
head(tw_new.df,10)
## long lat order hole piece id group ## 1 119.9170 26.17518 1 FALSE 1 1 1.1 ## 2 119.9171 26.17517 2 FALSE 1 1 1.1 ## 3 119.9171 26.17518 3 FALSE 1 1 1.1 ## 4 119.9171 26.17518 4 FALSE 1 1 1.1 ## 5 119.9171 26.17518 5 FALSE 1 1 1.1 ## 6 119.9172 26.17518 6 FALSE 1 1 1.1 ## 7 119.9172 26.17518 7 FALSE 1 1 1.1 ## 8 119.9172 26.17518 8 FALSE 1 1 1.1 ## 9 119.9173 26.17515 9 FALSE 1 1 1.1 ## 10 119.9173 26.17515 10 FALSE 1 1 1.1
# Randomly generate some data for plotting
mydata <- data.frame(NAME_2=tw_new$T_Name, id=tw_new$T_UID,
prevalence=rnorm(length(tw_new$T_UID)))
head(mydata)
## NAME_2 id prevalence ## 1 \xa6\xa8\xa5\\\xc2\xed 178 0.5066914 ## 2 \xa8ΥV\xb6m 164 1.8918956 ## 3 \xb3\xc1\xbcd\xb6m 118 0.8782975 ## 4 \xba\xf1\xaeq\xb6m 376 -0.9562235 ## 5 \xc4\xf5\xc0\xac\xb6m 369 0.8223115 ## 6 \xa5Ф\xa4\xc2\xed 78 -0.4283949
# use iconv to convert
# from big5 to utf-8
mydata$NAME_2 <- iconv(as.character(mydata$NAME_2),
from="big5", to="UTF-8")
head(mydata,10)
## NAME_2 id prevalence ## 1 成功鎮 178 0.5066914 ## 2 佳冬鄉 164 1.8918956 ## 3 麥寮鄉 118 0.8782975 ## 4 綠島鄉 376 -0.9562235 ## 5 蘭嶼鄉 369 0.8223115 ## 6 田中鎮 78 -0.4283949 ## 7 社頭鄉 83 -1.0360082 ## 8 竹田鄉 157 0.5036374 ## 9 萬丹鄉 148 0.1431512 ## 10 三灣鄉 64 -0.8861121
final.plot <- merge(tw_new.df, mydata,by="id", all.x=T) head(final.plot,10)
## id long lat order hole piece group NAME_2 prevalence ## 1 1 119.9170 26.17518 1 FALSE 1 1.1 南竿鄉 1.473039 ## 2 1 119.9171 26.17517 2 FALSE 1 1.1 南竿鄉 1.473039 ## 3 1 119.9171 26.17518 3 FALSE 1 1.1 南竿鄉 1.473039 ## 4 1 119.9171 26.17518 4 FALSE 1 1.1 南竿鄉 1.473039 ## 5 1 119.9171 26.17518 5 FALSE 1 1.1 南竿鄉 1.473039 ## 6 1 119.9172 26.17518 6 FALSE 1 1.1 南竿鄉 1.473039 ## 7 1 119.9172 26.17518 7 FALSE 1 1.1 南竿鄉 1.473039 ## 8 1 119.9172 26.17518 8 FALSE 1 1.1 南竿鄉 1.473039 ## 9 1 119.9173 26.17515 9 FALSE 1 1.1 南竿鄉 1.473039 ## 10 1 119.9173 26.17515 10 FALSE 1 1.1 南竿鄉 1.473039
library(RColorBrewer)
twcmap <- ggplot() +
geom_polygon(data = final.plot,
aes(x = long, y = lat, group = group,
fill = prevalence),
color = "black", size = 0.25) +
coord_map()+
scale_fill_gradientn(colours = brewer.pal(9,"Reds"))+
theme_void()+
labs(title="Prevalence of X in Taiwan")
twcmap
ggmap package ggmap (import Google map)if (!require('ggmap')){
install.packages("ggmap")
library(ggmap)
}
twmap <- get_map(location='Taiwan', zoom=7, language="en-US")
# location: name or long/lat
# zoom: 2-20
# language: map language
ggmap (plot)ggmap(twmap)
ggmap (maptype)# maptype: "terrain", "terrain-background", "satellite",
# "roadmap","hybrid" (google maps), "terrain", "watercolor",
# "toner" (stamen maps),
# or a positive integer for cloudmade maps (see ?get_cloudmademap)
tpmap <- get_map(location = c(121.43, 24.93, 121.62, 25.19),
zoom = 14, maptype = 'roadmap')
ggmap(tpmap, extent = 'device') # extent='device': full screen
ggmap + choroplethggmap(twmap) + # ggmap
geom_polygon(data = final.plot,
aes(x = long, y = lat, group = group, fill = prevalence),
color = "grey80", size = 0.1,alpha = 0.5) +
scale_fill_gradientn(colours = brewer.pal(9,"Reds"))
ggplot2 + ggmap# Get location of each state
StateCenter <- data.frame(
region=tolower(state.name),
lon=state.center$x,
lat=state.center$y)
head(StateCenter, 1)
## region lon lat ## 1 alabama -86.7509 32.5901
# Population data StatePop <- merge(df_pop_state, StateCenter, by="region") head(StatePop, 1)
## region value lon lat ## 1 alabama 4777326 -86.7509 32.5901
# Value to point
PopPoint <- NULL
for(i in 1:nrow(StatePop)){
# 1M people -> one point
for(j in 1:(StatePop[i, "value"] / 1000000)){
PopPoint <- rbind(PopPoint, StatePop[i, ])
}
}
head(PopPoint, 3)
## region value lon lat ## 1 alabama 4777326 -86.7509 32.5901 ## 2 alabama 4777326 -86.7509 32.5901 ## 3 alabama 4777326 -86.7509 32.5901
USMap <- get_map(location = "United States", zoom = 4)
densityMap <- ggmap(USMap, extent = "device") +
geom_density2d(data = PopPoint, aes(x = lon, y = lat), size = 0.3) +
stat_density2d(data = PopPoint,
aes(x = lon, y = lat, fill = ..level.., alpha = ..level..),
size = 0.01, bins = 16, geom = "polygon") +
scale_fill_gradient(low = "green", high = "red", guide = FALSE) +
scale_alpha(range = c(0, 0.3), guide = FALSE)
densityMap
ggmap Referenceggplot(Data)geom_line(), geom_bar(), geom_point()ggplot(Data, aes(x=1, y=1, color='black'))scale_fill_continuous(), scale_fill_grey()stat_density2d()